Chapter 4 Community composition

load("data/data.Rdata")

4.1 Taxonomy overview

4.1.1 Stacked barplot

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(. ~ Location + Origin,  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

### Phylum relative abundances

phylum_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum) %>%
  summarise(relabun=sum(count))

phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) %>%
    tt()
tinytable_f6ahdwr9kpbfbcvx497s
phylum mean sd
Bacteroidota 0.3174419111 0.183019959
Bacillota_A 0.1950249672 0.120311961
Actinomycetota 0.1816628641 0.164661070
Pseudomonadota 0.0906110310 0.129035667
Campylobacterota 0.0753140001 0.122740598
Bacillota_C 0.0657125307 0.090589426
Bacillota 0.0558322249 0.084087344
Fusobacteriota 0.0169238273 0.036464401
Desulfobacterota 0.0012635896 0.004283801
Cyanobacteria 0.0002130539 0.001747414
phylum_arrange <- phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun)) %>%
    arrange(-mean) %>%
    select(phylum) %>%
    pull()

phylum_summary %>%
    filter(phylum %in% phylum_arrange) %>%
    mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
    ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
        scale_color_manual(values=phylum_colors[rev(phylum_arrange)]) +
        geom_jitter(alpha=0.5) + 
        theme_minimal() + 
        theme(legend.position="none") +
        labs(y="Phylum",x="Relative abundance")

4.2 Taxonomy boxplot

4.2.1 Family

family_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,family) %>%
  summarise(relabun=sum(count))

family_summary %>%
    group_by(family) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) %>%
    tt()
tinytable_q9ykrn1mvg94ydzdxbcp
family mean sd
Bacteroidaceae 2.957679e-01 0.1794702282
Lachnospiraceae 1.072346e-01 0.0913528566
Coriobacteriaceae 9.631433e-02 0.1009342181
Helicobacteraceae 5.410633e-02 0.1022560566
Megasphaeraceae 3.895540e-02 0.0732948425
Enterobacteriaceae 3.671005e-02 0.1156492986
Bifidobacteriaceae 3.634821e-02 0.0642309973
Succinivibrionaceae 3.534985e-02 0.0599537024
Ruminococcaceae 3.408006e-02 0.0348433297
Actinomycetaceae 3.080463e-02 0.0930244958
Campylobacteraceae 2.120767e-02 0.0374825084
Lactobacillaceae 2.056464e-02 0.0600942736
Fusobacteriaceae 1.692383e-02 0.0364644007
Clostridiaceae 1.655096e-02 0.0573573568
Burkholderiaceae 1.538244e-02 0.0157737872
Atopobiaceae 1.510044e-02 0.0393574894
Erysipelotrichaceae 1.498819e-02 0.0297202782
Porphyromonadaceae 1.231678e-02 0.0490743570
Oscillospiraceae 1.169099e-02 0.0154660823
Dialisteraceae 1.109249e-02 0.0168027270
Peptoniphilaceae 1.027007e-02 0.0377623762
Streptococcaceae 1.009343e-02 0.0465284432
Selenomonadaceae 9.051718e-03 0.0152538557
Acidaminococcaceae 6.612925e-03 0.0134491243
Erysipelatoclostridiaceae 6.598637e-03 0.0107938056
Peptostreptococcaceae 6.477379e-03 0.0101260977
Anaerovoracaceae 4.239616e-03 0.0103079362
Tannerellaceae 3.414585e-03 0.0079108669
Pasteurellaceae 2.963621e-03 0.0184648739
Enterococcaceae 2.804985e-03 0.0085239000
Mycobacteriaceae 2.785187e-03 0.0190576068
Rikenellaceae 2.616836e-03 0.0106713550
Acutalibacteraceae 2.198544e-03 0.0044303862
Marinifilaceae 2.086026e-03 0.0055223355
Butyricicoccaceae 1.463315e-03 0.0031506247
Desulfovibrionaceae 1.263590e-03 0.0042838013
Muribaculaceae 9.291851e-04 0.0040885460
UBA660 6.460679e-04 0.0035734633
Anaerotignaceae 5.353989e-04 0.0025028446
Barnesiellaceae 3.105707e-04 0.0011701232
UMGS124 3.100703e-04 0.0017862741
Gastranaerophilaceae 2.130539e-04 0.0017474140
CAG-508 2.052995e-04 0.0007194205
CAG-239 2.050753e-04 0.0018010322
CAG-826 1.362811e-04 0.0007776869
UBA1381 7.878665e-05 0.0007556950
family_arrange <- family_summary %>%
    group_by(family) %>%
    summarise(mean=sum(relabun)) %>%
    arrange(-mean) %>%
    select(family) %>%
    pull()

# Per origin
family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~Origin)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")

# Per location
family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~Location)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")

4.2.2 Genus

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,phylum,genus) %>%
  summarise(relabun=sum(count)) %>%
  filter(genus != "g__") %>%
  mutate(genus= sub("^g__", "", genus))

genus_summary_sort <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=mean(relabun, na.rm=T),sd=sd(relabun, na.rm=T)) %>%
    arrange(-mean) 

genus_summary_sort %>%
    tt()
tinytable_7ny6h0rgvoyxu47ftcoy
genus mean sd
Prevotella 1.364856e-01 0.1353999322
Collinsella 9.631433e-02 0.1009342181
Phocaeicola 8.993433e-02 0.0955977240
Bacteroides 5.173169e-02 0.0635714050
Megasphaera 3.869704e-02 0.0730748863
Helicobacter_B 3.657334e-02 0.0843378196
Bifidobacterium 3.634821e-02 0.0642309973
Anaerobiospirillum 3.459305e-02 0.0599454952
Escherichia 3.275523e-02 0.1083759749
Roseburia 2.314992e-02 0.0533913900
Campylobacter_D 2.120767e-02 0.0374825084
Blautia_A 2.051642e-02 0.0231346773
Pauljensenia 1.977626e-02 0.0800352518
Negativibacillus 1.819490e-02 0.0215815280
Prevotellamassilia 1.738141e-02 0.0343949273
Helicobacter_A 1.713427e-02 0.0507397988
Clostridium_P 1.645501e-02 0.0573123071
Fusobacterium_B 1.469492e-02 0.0345732902
Sutterella 1.425550e-02 0.0152994499
Blautia 1.333921e-02 0.0183592066
UBA7748 1.330938e-02 0.0382333948
Porphyromonas_A 1.231678e-02 0.0490743570
Trueperella 1.102837e-02 0.0501260208
Ligilactobacillus 1.038602e-02 0.0327671906
Dialister 1.031903e-02 0.0157533129
Streptococcus 9.357277e-03 0.0446998121
Gemmiger 8.872488e-03 0.0146460260
Holdemanella 8.793459e-03 0.0187067258
Lactobacillus 8.261034e-03 0.0348551627
Clostridium_Q 7.967312e-03 0.0129501748
Ruminococcus_B 7.491864e-03 0.0133624741
Megamonas 6.998825e-03 0.0128982165
Peptacetobacter 6.477379e-03 0.0101260977
Catenibacterium 6.451673e-03 0.0106973049
Faecalimonas 6.214440e-03 0.0112084591
Faecalibacterium 6.206186e-03 0.0133252461
Bulleidia 6.127964e-03 0.0165765313
Lawsonibacter 5.743270e-03 0.0087015993
Peptoniphilus_A 4.928663e-03 0.0217722683
Acidaminococcus 4.738436e-03 0.0131157898
Eisenbergiella 4.302829e-03 0.0073261825
Plesiomonas 3.954812e-03 0.0176151160
Parabacteroides 3.414585e-03 0.0079108669
Catenibacillus 3.242799e-03 0.0054534742
Histophilus 2.963621e-03 0.0184648739
Alistipes 2.616836e-03 0.0106713550
CAG-81 2.576496e-03 0.0041736848
Mediterraneibacter 2.497312e-03 0.0073570318
Sellimonas 2.293821e-03 0.0042562326
Dysosmobacter 2.255402e-03 0.0038273521
Fusobacterium_A 2.228906e-03 0.0084702675
Mitsuokella 2.052893e-03 0.0089367483
Ruminococcus_A 2.042994e-03 0.0028573373
Corynebacterium 1.947467e-03 0.0162046636
CAG-317 1.827371e-03 0.0032549462
Parolsenella 1.791055e-03 0.0042478478
Odoribacter 1.786646e-03 0.0054424090
Phascolarctobacterium_A 1.773898e-03 0.0045007766
UMGS905 1.691812e-03 0.0039135118
Limosilactobacillus 1.637804e-03 0.0065016504
Flavonifractor 1.526253e-03 0.0030043128
S5-A14a 1.461682e-03 0.0086194900
CAG-110 1.454019e-03 0.0077541828
VUNA01 1.401454e-03 0.0054991752
Butyricicoccus 1.357370e-03 0.0031145926
Enterococcus_B 1.310733e-03 0.0059365102
Schaedlerella 1.305216e-03 0.0026356164
Desulfovibrio 1.146069e-03 0.0040502698
Enterocloster 1.081652e-03 0.0016014566
CAG-521 1.044864e-03 0.0044915406
Robinsoniella 1.000496e-03 0.0022500094
CAG-279 9.291851e-04 0.0040885460
Enterococcus_E 8.894621e-04 0.0052314621
Fusicatenibacter 8.503089e-04 0.0040305315
Lawsonella 8.377194e-04 0.0044649755
Allisonella 7.734581e-04 0.0017636201
Lactococcus 7.361496e-04 0.0045553482
UMGS1370 7.113596e-04 0.0019148297
Lachnospira 6.870854e-04 0.0021298406
Dorea_B 6.868715e-04 0.0019244447
Bariatricus 6.260243e-04 0.0020974371
UBA9502 6.094947e-04 0.0010769033
Enterococcus 6.047896e-04 0.0038126660
UMGS1472 5.927633e-04 0.0010326554
Emergencia 5.805635e-04 0.0012817363
Succinivibrio 5.598203e-04 0.0022589496
Anaerotignum 5.353989e-04 0.0025028446
CAG-877 5.310297e-04 0.0030198616
Hungatella_A 5.159539e-04 0.0024856275
Evtepia 4.179293e-04 0.0010548717
Eubacterium_M 3.872451e-04 0.0020920297
Ruminococcus_C 3.818769e-04 0.0021783041
Anaerobutyricum 3.713931e-04 0.0007790163
Barnesiella 3.105707e-04 0.0011701232
Butyricimonas 2.993807e-04 0.0011821610
UMGS1872 2.941129e-04 0.0019462982
Mobilibacterium 2.870924e-04 0.0013257840
Fournierella 2.796504e-04 0.0011213974
Eubacterium_H 2.754780e-04 0.0018906167
Caecibacter 2.583579e-04 0.0011470773
Clostridium_A 2.543803e-04 0.0019730949
Helicobacter_C 2.498036e-04 0.0018283330
Paraprevotella 2.348920e-04 0.0018824059
Latilactobacillus 2.222007e-04 0.0021312745
Dorea_A 2.150652e-04 0.0015260870
Zag111 2.130539e-04 0.0017474140
CAG-354 2.052995e-04 0.0007194205
CAG-495 2.050753e-04 0.0018010322
Anaerobiospirillum_A 1.969800e-04 0.0010117016
Peptoniphilus_C 1.797556e-04 0.0017241550
Eubacterium_G 1.536509e-04 0.0010434640
Helicobacter_D 1.489164e-04 0.0009489548
Erysipelatoclostridium 1.469635e-04 0.0005908793
Phocea 1.449534e-04 0.0004735835
Ruminococcus_E 1.382618e-04 0.0012752336
UBA4855 1.362811e-04 0.0007776869
CAG-145 1.215784e-04 0.0004454832
Mailhella 1.175204e-04 0.0005162449
CAG-988 1.150383e-04 0.0008178711
Marseille-P4683 1.140905e-04 0.0005092329
AM07-15 1.059455e-04 0.0004745690
Phascolarctobacterium 1.005920e-04 0.0007816980
Clostridium 9.595775e-05 0.0005366982
Anaerostipes 8.894740e-05 0.0004351045
Parasutterella 8.207495e-05 0.0007872353
CAG-41 7.878665e-05 0.0007556950
Absicoccus 6.676847e-05 0.0002502435
Levilactobacillus 5.757608e-05 0.0005522504
genus_arrange <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=sum(relabun)) %>%
    filter(genus != "g__")%>%
    arrange(-mean) %>%
    select(genus) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    pull()

#Per day
genus_summary %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    mutate(genus=factor(genus, levels=rev(genus_summary_sort %>% pull(genus)))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
        scale_color_manual(values=phylum_colors) +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~Origin)+
        theme_minimal() + 
        labs(y="Family", x="Relative abundance", color="Phylum")